import requests
from lxml import etree
from fake_useragent import UserAgent

headers = {
    "User-Agent": UserAgent().random
}


def spider(i):
    url = 'https://maoyan.com/board/4?offset=' + str(10 * i)
    html = requests.get(url, headers=headers).content.decode("utf-8")  # 返回网页html源码
    html = etree.HTML(html)
    datas = html.xpath('//*[@id="app"]/div/div/div[1]/dl/dd')
    a = 0
    for data in datas:
        data_title = data.xpath('div/div/div[1]/p[1]/a/@title')  # 电影名
        data_info = data.xpath('div/div/div[1]/p[2]/text()')  # 主演
        data_src = data.xpath('a/img[2]/@data-src')  # img地址

        print("No: " + str(i * 10 + a + 1))  # 此处用于打印简略的电影榜单
        print(data_title[0], data_src[0])
        with open('电影.txt', 'a+', encoding='utf-8')as f:
            f.write("No: " + str(i * 10 + a + 1) + '\n')  # 排名
            f.write(data_title[0] + '\n')  # 电影名
            f.write(str(data_info[0]).strip() + '\n')  # 主演
            f.write(data_src[0] + '\n')  # img 地址
            f.write('\n' * 3)
        a += 1


if __name__ == '__main__':
    num = int(input("爬取的页数:"))
    for i in range(num):
        spider(i)
